import difflib
import sys
import os
import re
import cProfile
import io
import pstats

def calculate_similarity(original_text, plagiarized_text):
    differ = difflib.SequenceMatcher(None, original_text, plagiarized_text)
    return differ.ratio() * 100

def check_file_paths(original_file_path, plagiarized_file_path):
    try:
        # 检查文件路径格式是否符合要求
        orig_pattern = re.compile(r'^\.\/test\/orig\.txt$')
        plagiarized_pattern = re.compile(r'^\.\/test\/orig_0\.8_.+\.txt$')

        if not orig_pattern.match(original_file_path):
            raise ValueError("Invalid original file path.")

        if not plagiarized_pattern.match(plagiarized_file_path):
            raise ValueError("Invalid plagiarized file path.")
    except ValueError as ve:
        print("Invalid file path:", str(ve))
        return False
    return True

def main():
    # 检查命令行参数，一共四个，多了少了都不行。
    if len(sys.argv) != 4:
        print("Usage: python main.py <original_file> <plagiarized_file> <answer_file>")
        return

    original_file_path = sys.argv[1]
    plagiarized_file_path = sys.argv[2]
    answer_file_path = sys.argv[3]

    # 检查文件路径是否有效
    if not check_file_paths(original_file_path, plagiarized_file_path):
        return

    # 处理文件
    with open(original_file_path, 'r', encoding='utf-8') as original_file:
        original_text = original_file.read()

    with open(plagiarized_file_path, 'r', encoding='utf-8') as plagiarized_file:
        plagiarized_text = plagiarized_file.read()

    # 开始性能分析
    pr = cProfile.Profile()
    pr.enable()

    # 计算相似度
    similarity_percentage = calculate_similarity(original_text, plagiarized_text)

    # 结束性能分析
    pr.disable()
    
    # 将性能分析结果写入文件
    with open('cprofile_output.profile', 'w', encoding='utf-8') as profile_file:
        s = io.StringIO()
        sortby = 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        profile_file.write(s.getvalue())

    # 写入答案文件
    with open(answer_file_path, 'w', encoding='utf-8') as answer_file:
        answer_file.write("重复率：{:.2f}%".format(similarity_percentage))

if __name__ == "__main__":
    main()
